{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "eefccc7d-0b98-4973-97a7-e00eec35f36f",
   "metadata": {},
   "source": [
    "# How to get report as a pandas dataframe?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72de23b6-90fe-442f-ae54-415b2d5cd612",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "from sklearn import datasets, ensemble, model_selection\n",
    "\n",
    "from evidently import ColumnMapping\n",
    "from evidently.metrics import ClassificationQualityByClass \n",
    "from evidently.metrics import ClassificationQualityMetric \n",
    "from evidently.metrics import ColumnCorrelationsMetric \n",
    "from evidently.metrics import ColumnSummaryMetric \n",
    "from evidently.metrics import  DataDriftTable\n",
    "from evidently.metrics.base_metric import generate_column_metrics\n",
    "from evidently.report import Report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c387f28e-6ba5-4461-a1f7-e646d1b5469e",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dataset for Binary Probabilistic Classifcation\n",
    "bcancer_data = datasets.load_breast_cancer(as_frame=True)\n",
    "bcancer = bcancer_data.frame\n",
    "\n",
    "bcancer_ref = bcancer.sample(n=300, replace=False)\n",
    "bcancer_cur = bcancer.sample(n=200, replace=False)\n",
    "\n",
    "bcancer_label_ref = bcancer_ref.copy(deep=True)\n",
    "bcancer_label_cur = bcancer_cur.copy(deep=True)\n",
    "\n",
    "model = ensemble.RandomForestClassifier(random_state=1, n_estimators=10)\n",
    "model.fit(bcancer_ref[bcancer_data.feature_names.tolist()], bcancer_ref.target)\n",
    "\n",
    "bcancer_ref['prediction'] = model.predict_proba(bcancer_ref[bcancer_data.feature_names.tolist()])[:, 1]\n",
    "bcancer_cur['prediction'] = model.predict_proba(bcancer_cur[bcancer_data.feature_names.tolist()])[:, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d839ec3e-6c81-4d02-a54f-ece77d810d31",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "performance_report = Report(metrics=[\n",
    "    DataDriftTable(),\n",
    "    generate_column_metrics(ColumnSummaryMetric),\n",
    "    generate_column_metrics(ColumnCorrelationsMetric),\n",
    "    ClassificationQualityMetric(),\n",
    "    ClassificationQualityByClass(),\n",
    "])\n",
    "\n",
    "performance_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)\n",
    "\n",
    "performance_report.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1fc9306-9812-4ff2-9baa-997bf0f56d13",
   "metadata": {},
   "outputs": [],
   "source": [
    "# you can generate dataframe for the whole report as well as for a single metric only \n",
    "# dataframe for a single metric:\n",
    "drift_table_df = performance_report.as_dataframe(\"DataDriftTable\")\n",
    "drift_table_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4fb12bc2-35fc-46e0-adbe-4a9b90040ddd",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# you can generate dataframe for a whole report as well as for a single metric only \n",
    "# dataframe for the whole report (in this case you receive a dict of dataframes)\n",
    "report_df = performance_report.as_dataframe()\n",
    "report_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a80fd2c-9ae5-40c1-89e7-107af1fb8618",
   "metadata": {},
   "outputs": [],
   "source": [
    "# for a dataset-level metrics you always get a signle dataframe as a result\n",
    "quality_by_class_table_df = performance_report.as_dataframe(\"ClassificationQualityByClass\")\n",
    "quality_by_class_table_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "96ca2493-e872-4c8b-9c55-a22f32226a1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# for a number of same column-level metrics we try to do concatenation so that you can also get a single dataframe as a result\n",
    "summary_table_df = performance_report.as_dataframe(\"ColumnSummaryMetric\")\n",
    "summary_table_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
